BOT_NAME = 'nirsoft_crawler'

SPIDER_MODULES = ['nirsoft_crawler.spiders']
NEWSPIDER_MODULE = 'nirsoft_crawler.spiders'

# 下载路径配置
FILES_STORE = r'D:\爬虫\课程设计\下载'

# 管道配置
ITEM_PIPELINES = {
    'nirsoft_crawler.pipelines.DownloadZipPipeline': 300,
    'nirsoft_crawler.pipelines.ZipFilePipeline': 400,
}

# 请求头配置
DEFAULT_REQUEST_HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.9',
    'Connection': 'keep-alive',
}

# 下载设置
DOWNLOAD_DELAY = 2
ROBOTSTXT_OBEY = False  # 测试阶段关闭

# 编码设置
FEED_EXPORT_ENCODING = 'utf-8'
DEFAULT_RESPONSE_ENCODING = 'utf-8'

# 禁用压缩中间件
DOWNLOADER_MIDDLEWARES = {
    'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': None,
}

# 消除弃用警告
REQUEST_FINGERPRINTER_IMPLEMENTATION = '2.7'

# 限制下载数量
CLOSESPIDER_ITEMCOUNT = 6